1 Document Setup

library(plotly)
library(DataExplorer)
library(esquisse)
library(data.table)
library(psych)
library(MASS)
library(klaR)
library(tidyverse)
library(magrittr)
library(readr)
library(NbClust)
library(ggmap)
library(maps)
library(htmlwidgets)

2 Read in & Join Michelin Datasets

one_star_michelin_restaurants <- read_csv("one-star-michelin-restaurants.csv", 
                                          col_types = cols(latitude = col_character(), 
                                                           longitude = col_character(), 
                                                           year = col_integer(), 
                                                           zipCode = col_character())) %>%
  mutate(., stars=1)
two_star_michelin_restaurants <- read_csv("two-stars-michelin-restaurants.csv", 
                                          col_types = cols(latitude = col_character(), 
                                                           longitude = col_character(), 
                                                           year = col_integer(), 
                                                           zipCode = col_character()))  %>%
  mutate(., stars=2)
three_star_michelin_restaurants <- read_csv("three-stars-michelin-restaurants.csv", 
                                          col_types = cols(latitude = col_character(), 
                                                           longitude = col_character(), 
                                                           year = col_integer(), 
                                                           zipCode = col_character()))  %>%
  mutate(., stars=3,)

michelin<-one_star_michelin_restaurants %>% 
  bind_rows(., two_star_michelin_restaurants) %>%
  bind_rows(., three_star_michelin_restaurants) %>%
  mutate(., 
         city=factor(.$city),
         region=factor(.$region),
         zipCode=factor(.$zipCode),
         cuisine=factor(.$cuisine),
         price=factor(str_length(.$price)),
         stars=factor(.$stars),
         latitude=as.numeric(.$latitude),
         longitude=as.numeric(.$longitude))%>%
           mutate(., #Below is a simplified cuisine listing, as a result of restaurants defining their type of food as things like "innovative" and "classic cuisine"
                  cuisine.collapsed=fct_collapse(.$cuisine,
             American= c("American","Californian"),
             Australian= c("Australian"),
             Chinese= c("Cantonese","Hang Zhou","Sichuan-Huai Yang",
                        "Cantonese Roast Meats","Fujian","Hunanese and Sichuan",
                        "Chinese","Shanghainese","Taiwanese","Dim Sum","Sichuan","Taizhou","Noodles and congee"),
             French= c("Classic French","French contemporary","Creative French","French","Modern French"),
             British= c("Creative British","Traditional British","Modern British"),
             European= c("European","Austrian","European contemporary","Danish",
                         "Finnish"),
             Meats=c("Meats and grills","Barbecue"),
             Modern=c("modern","Contemporary","creative","Fusion","Modern cuisine","Creative",
                      "Gastropub","Innovative"),
             Other= c("International","Street Food",
                      "Temple cuisine","Seafood","Vegetarian",
                      "Classic cuisine","Market cuisine","Regional cuisine","Steakhouse"),
             Japanese= c("Japanese contemporary","Sushi","Teppanyaki","Japanese"),
             Moroccan= c("Moroccan"),
             Scandinavian= c("Scandinavian"),
             Asian= c("Asian","Asian contemporary","Asian influences"),
             Italian= c("Italian","Italian contemporary"),
             Korean= c("Korean","Korean contemporary"),
             Mediterranean= c("Mediterranean","Mediterranean cuisine"),
             Thai= c("Southern Thai","Thai","Thai Contemporary"),
             Indian= c("Indian"),
             Malaysian= c("Peranakan"),
             Spanish= c("Spanish"),
             Mexican= c("Mexican")))

head(michelin)
## # A tibble: 6 x 12
##   name   year latitude longitude city  region zipCode cuisine price url   stars
##   <chr> <int>    <dbl>     <dbl> <fct> <fct>  <fct>   <fct>   <fct> <chr> <fct>
## 1 Kili…  2019     47.3      10.2 Klei… Austr… 87568   Creati… 5     http… 1    
## 2 Pfef…  2019     47.8      13.1 Hall… Austr… 5300    Classi… 5     http… 1    
## 3 Essz…  2019     47.8      13.0 Salz… Austr… 5020    Creati… 5     http… 1    
## 4 Carp…  2019     47.8      13.0 Salz… Austr… 5020    Market… 5     http… 1    
## 5 Edva…  2019     48.2      16.4 Wien  Austr… 1010    Modern… 4     http… 1    
## 6 Das …  2019     48.2      16.4 Wien  Austr… 1020    Modern… 5     http… 1    
## # … with 1 more variable: cuisine.collapsed <fct>

3 K-Modes Clustering

michelin.kmodes <- michelin %>%
  mutate_if(sapply(., is.factor), as.numeric)%>%
  dplyr::select(.,c("cuisine.collapsed","price","stars"))

NbClust(michelin.kmodes,
        distance="euclidean",
        min.nc=2,
        max.nc=20,
        method="ward.D")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 3 proposed 2 as the best number of clusters 
## * 4 proposed 3 as the best number of clusters 
## * 7 proposed 4 as the best number of clusters 
## * 1 proposed 5 as the best number of clusters 
## * 2 proposed 6 as the best number of clusters 
## * 1 proposed 17 as the best number of clusters 
## * 3 proposed 18 as the best number of clusters 
## * 2 proposed 20 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************
## $All.index
##         KL        CH Hartigan     CCC    Scott    Marriot     TrCovW    TraceW
## 2   1.3679  731.7359 464.9049 40.7610 2678.303 1741830711 4705895.93 4530.4576
## 3   1.4050  842.5484 350.4133 27.2827 3283.952 1639561583 1754556.80 2711.4551
## 4  59.3874  961.5437 107.5604 30.2000 3717.357 1562346902  631079.75 1799.9837
## 5   0.0183  859.0576 312.7938 27.6781 3976.057 1682441369  416556.72 1557.5387
## 6   6.2031 1059.8120 111.6136 32.8812 4450.720 1223750354  133819.39 1071.7075
## 7   1.8378 1043.3315  84.4441 32.6902 4731.470 1112115971  122605.87  922.3007
## 8   1.7926 1014.6338  67.5305 32.1847 4939.855 1076261980   96703.74  821.4742
## 9   0.3089  982.0834  88.4125 31.5522 5328.530  778656832   86043.23  747.9522
## 10  2.0641  993.8443  63.9171 32.0367 5505.345  745370017   56754.19  662.5606
## 11  4.4886  982.8763  45.0739 31.9403 5650.046  732378809   43815.08  606.0137
## 12  0.2506  955.1042  21.0692 31.3949 5765.181  738527889   36554.08  568.5478
## 13  0.5327  902.9517  66.4582 30.1462 5811.845  810460244   33774.98  551.5341
## 14  3.0261  918.4780  21.7103 30.7582 5966.103  752848661   27343.70  502.5615
## 15  0.2189  880.3182  80.2699 29.8517 6022.490  796890642   25438.09  487.0348
## 16  2.4199  922.6114  49.3633 31.2288 6222.561  679884218   20923.24  435.6133
## 17  0.3103  929.5441 102.2960 31.5981 6341.840  646482976   18610.24  406.0905
## 18 33.1907 1011.3873  27.1355 33.9503 6627.715  480358586   14040.13  352.8524
## 19  1.4961  993.5232  21.9950 33.6677 6724.225  465822219   13109.87  339.2545
## 20  1.8611  971.5756  21.5028 33.2661 6775.070  479734222   11907.97  328.5640
##    Friedman    Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky
## 2   36.5514  17.2660 0.2434 0.8208     0.5656 0.4554 661.3258 2.0322    0.2250
## 3   60.2710  28.8491 0.1890 0.8305     0.5224 0.2343 450.9394 5.5229    0.2472
## 4   92.4656  43.4576 0.2315 0.6902     0.5399 0.7079 177.8703 0.7009    0.2360
## 5  100.5557  50.2222 0.2066 0.9837     0.3780 0.2970 283.9735 3.9954    0.2803
## 6  165.8452  72.9891 0.1868 0.8941     0.4042 0.6415 131.8936 0.9474    0.2643
## 7  205.6688  84.8129 0.1609 0.9373     0.4295 0.4094 278.3839 2.4429    0.2633
## 8  267.8373  95.2227 0.1499 0.9172     0.4321 0.5592 123.7745 1.3336    0.2470
## 9  277.6087 104.5829 0.1371 0.8929     0.4687 0.6505  60.7237 0.9068    0.2627
## 10 315.6959 118.0617 0.1605 1.0036     0.4540 0.6802  42.7850 0.7917    0.2544
## 11 327.8350 129.0779 0.1444 1.0453     0.4557 0.5741  45.9885 1.2427    0.2490
## 12 375.8978 137.5839 0.1385 1.0096     0.4784 0.0000      Inf    Inf    0.2389
## 13 378.0800 141.8280 0.1367 0.9511     0.5025 0.5130  73.1014 1.5955    0.2311
## 14 386.6055 155.6486 0.1244 0.9249     0.5275 0.0000      Inf    Inf    0.2273
## 15 390.1135 160.6107 0.1195 0.8833     0.5618 0.5147  46.1989 1.5730    0.2209
## 16 483.6394 179.5698 0.1106 0.8767     0.5698 0.6128  53.0787 1.0631    0.2152
## 17 495.9660 192.6246 0.0952 0.9081     0.5960 0.4691  46.4012 1.8808    0.2108
## 18 713.3914 221.6876 0.0998 0.9090     0.5959 0.7681  21.1397 0.5069    0.2052
## 19 730.0732 230.5732 0.0868 0.9202     0.5916 0.2529 118.1579 4.9062    0.2020
## 20 734.4826 238.0754 0.0833 0.9023     0.5936 0.6567  24.5654 0.8713    0.1976
##         Ball Ptbiserial    Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2  2265.2288     0.5763  0.9034  0.1984 0.0864  2e-04  2.8000 2.0929 0.7684
## 3   903.8184     0.6282 -0.1457  0.3799 0.0990  2e-04  2.2978 1.6224 0.4529
## 4   449.9959     0.6502  5.3030  0.3701 0.1302  2e-04  1.3926 1.3665 0.1947
## 5   311.5077     0.4575  0.0035  0.9424 0.1302  3e-04  2.6081 1.2529 0.2209
## 6   178.6179     0.4711  0.8973  0.9046 0.1302  3e-04  2.4546 1.0690 0.0813
## 7   131.7572     0.4343  1.5263  1.0996 0.1302  3e-04  2.6312 0.9758 0.0764
## 8   102.6843     0.3961  0.9056  1.3422 0.1302  3e-04  2.9788 0.8826 0.0875
## 9    83.1058     0.3681  0.2548  1.5477 0.1302  3e-04  3.2281 0.8217 0.0713
## 10   66.2561     0.3619  0.2639  1.5371 0.1741  3e-04  3.1527 0.7848 0.0660
## 11   55.0922     0.3564  0.3449  1.5191 0.1741  3e-04  3.1311 0.7519 0.0626
## 12   47.3790     0.3529  1.8785  1.5209 0.1741  3e-04  4.2546 0.7043 0.0572
## 13   42.4257     0.3443  0.2471  1.5945 0.1741  3e-04  4.4482 0.6553 0.0511
## 14   35.8972     0.3395  1.1192  1.5628 0.1741  3e-04  4.5615 0.6265 0.0468
## 15   32.4690     0.3251  0.0936  1.6770 0.1741  3e-04  4.7189 0.5818 0.0428
## 16   27.2258     0.3250  0.2425  1.6015 0.1741  3e-04  4.6139 0.5537 0.0414
## 17   23.8877     0.3176  0.0595  1.5257 0.1741  3e-04  4.7630 0.5320 0.0402
## 18   19.6029     0.3182  0.2710  1.4259 0.2000  3e-04  4.5073 0.5061 0.0363
## 19   17.8555     0.3110  0.3641  1.3509 0.2000  3e-04  5.1115 0.4799 0.0355
## 20   16.4282     0.3081  0.2782  1.3360 0.2000  3e-04  5.0785 0.4547 0.0334
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.6930           244.9257       0.1074
## 3          0.5991            92.3269       0.0010
## 4          0.6805           202.3331       0.5515
## 5          0.5857            84.8793       0.0081
## 6          0.6431           130.9672       0.4172
## 7          0.6280           114.3488       0.0632
## 8          0.6108           100.0491       0.2627
## 9          0.5797            81.9449       0.4379
## 10         0.5563            72.5697       0.4994
## 11         0.5088            59.8593       0.2956
## 12         0.5265            63.8404       0.0000
## 13         0.5367            66.4787       0.1912
## 14         0.5928            88.6202       0.0000
## 15         0.4752            54.1058       0.1984
## 16         0.5471            69.5362       0.3653
## 17         0.4474            50.6396       0.1363
## 18         0.5247            63.3993       0.6779
## 19         0.4434            50.2168       0.0030
## 20         0.4689            53.2292       0.4577
## 
## $Best.nc
##                      KL       CH Hartigan    CCC    Scott   Marriot  TrCovW
## Number_clusters  4.0000    6.000   4.0000  2.000   3.0000         6       3
## Value_Index     59.3874 1059.812 242.8529 40.761 605.6494 347056632 2951339
##                  TraceW Friedman    Rubin  Cindex     DB Silhouette   Duda
## Number_clusters   3.000  18.0000  18.0000 20.0000 4.0000     17.000 4.0000
## Value_Index     907.531 217.4254 -20.1773  0.0833 0.6902      0.596 0.7079
##                 PseudoT2  Beale Ratkowsky    Ball PtBiserial Frey McClain Dunn
## Number_clusters   4.0000 2.0000    5.0000    3.00     4.0000    1  2.0000 18.0
## Value_Index     177.8703 2.0322    0.2803 1361.41     0.6502   NA  0.1984  0.2
##                 Hubert SDindex Dindex    SDbw
## Number_clusters      0  4.0000      0 20.0000
## Value_Index          0  1.3926      0  0.0334
## 
## $Best.partition
##   [1] 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 1 2 3 3 2 1 1 3 1 2 2 1 1 4 1 1 3 1
##  [38] 2 1 3 3 3 1 3 2 3 1 1 3 2 1 1 2 1 1 3 3 1 1 3 2 3 1 3 1 2 2 1 2 1 1 3 3 2
##  [75] 2 2 3 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 3 1 4 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 2 1 1 3 3 3 3 3 3 3 3 2 3 1
## [149] 3 3 2 1 1 3 3 3 3 3 1 3 3 3 3 1 3 1 1 1 3 3 3 3 2 3 3 1 1 2 1 1 1 1 1 1 1
## [186] 1 1 3 3 3 3 2 1 3 1 3 3 1 1 1 2 1 4 1 2 1 1 2 3 2 2 2 1 2 3 1 2 2 1 2 2 1
## [223] 3 1 2 1 1 4 2 1 1 2 4 2 1 2 3 2 1 2 2 2 4 1 1 2 2 2 1 2 2 2 2 2 2 1 2 3 3
## [260] 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 3 2 2 1 1 2 2 2 1 2 2 2 1 2 2 1 1 1 2
## [297] 1 1 1 3 2 1 2 1 3 1 3 2 3 3 3 1 3 2 1 3 3 1 1 1 3 1 3 3 1 2 2 3 1 3 1 2 1
## [334] 3 3 1 3 3 1 1 1 2 1 1 1 1 1 1 1 2 1 3 1 1 1 1 1 1 3 1 1 2 3 3 3 2 3 3 2 2
## [371] 3 1 1 3 3 1 4 1 4 4 1 4 4 1 4 2 4 1 4 1 4 1 4 1 4 4 4 1 3 1 2 2 3 1 1 3 1
## [408] 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [482] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 1 3 1 1 1 3 1 1 1 1 4 1 3 1 1 1 1 1 4
## [519] 1 1 1 1 1 4 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [556] 1 1 2 1 3 2 1 2 2 2 1 3 1 1 1 1 1 1 3 1 1 1 1 3 3 3 2 3 2 1 3 1 1 1 2 1 3
## [593] 3 2 1 1 1 2 1 1 3 1 2 2 1 1 4 1 4 1 1 1 1 1 2 2 2 2 1 2 1 2 3 1 1 1 1 1 1
## [630] 1 2 1 2 3 1 3 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 1 1 1 1
## [667] 1 1 1 3 1 3 1 2 1 2 3 1 3 1 2 1 3 1 1 2 2 1 3 3 1 1 1 2 1
kmode.4<-kmodes(michelin.kmodes,
                     4,
                     iter.max = 10,
                     weighted = FALSE)

cluster.info<-kmode.4$modes %>%
  mutate(., 
         cuisine.collapsed=levels(michelin$cuisine.collapsed)[.$cuisine.collapsed],
         price=strrep("$",.$price))
michelin$cluster<-kmode.4$cluster
cluster.info
##   cuisine.collapsed price stars
## 1           Chinese   $$$     1
## 2            Modern   $$$     1
## 3          Japanese  $$$$     1
## 4            Modern  $$$$     1

4 ggplot2 Plots

plot(michelin.kmodes,col=kmode.4$cluster)

michelin.kmodes %>% 
  mutate(.,cluster=kmode.4$cluster) %>% 
  ggplot(.,aes(x=stars,y=price,color=cluster,fill=cluster)) +
  geom_point(size=2) +
  geom_jitter() 

michelin.kmodes %>% 
  mutate(.,cluster=kmode.4$cluster) %>% 
  ggplot(.,aes(x=cuisine.collapsed,y=price,color=cluster,fill=cluster)) +
  geom_point(size=2) +
  geom_jitter()

michelin.kmodes %>% 
  mutate(.,cluster=kmode.4$cluster) %>% 
  ggplot(.,aes(x=cuisine.collapsed,y=stars,color=cluster,fill=cluster)) +
  geom_point(size=2) +
  geom_jitter()

# Plotly Plots

michelin.3d<-michelin.kmodes %>%
  mutate(.,cluster=kmode.4$cluster) %>% 
  plot_ly(x=.$cuisine.collapsed, 
          y=.$price, 
          z=.$stars, 
          type="scatter3d", 
          mode="markers", 
          color=.$cluster,
          showlegend=FALSE,
          hoverinfo="text",
          hovertext=paste(michelin$name,":",
      "A",strrep("$",michelin$price),
      michelin$stars,"Michelin star",
      michelin$cuisine.collapsed,"restaurant",
      "in",michelin$city))
michelin.3d
saveWidget(michelin.3d,"michelin_3d.html")

htmltools::tags$iframe(
  src = "michelin_3d.html", 
  scrolling = "no", 
  seamless = "seamless",
  frameBorder = "0"
)
Sys.setenv("MAPBOX_TOKEN"="pk.eyJ1IjoicnlhbmNhaGlsZGVicmFuZHQiLCJhIjoiY2tiNWd0MzJmMTN5MzJybXZ0cnp2N2c0MSJ9.qh0GjKns3qfkdZFxLlG4Lw")
michelin.map<-plot_mapbox(maps::world.cities) %>%
  add_markers(
    x = michelin$longitude, 
    y = michelin$latitude, 
    size = as.numeric(michelin$price)**5, 
    color = michelin$stars,
    hoverinfo="text",
    hovertext=paste(michelin$name,":",
      "A",strrep("$",michelin$price),
      michelin$stars,"Michelin star",
      michelin$cuisine.collapsed,"restaurant",
      "in",michelin$city))
michelin.map
saveWidget(michelin.map,"michelin_map.html")

htmltools::tags$iframe(
  src = "michelin_map.html", 
  scrolling = "no", 
  seamless = "seamless",
  frameBorder = "0"
)